For more details, see Software and Package Versions.
Run drop down (top right of the
code pane) and click Run Allknit (top left of code
pane) and a file will be generated in docs/index.htmlInstall R packages if needed.
# Required packages
required_packages <- c(
"rmarkdown",
"bookdown",
"knitr",
"tidyverse",
"glue",
"readxl",
"ggtext",
"scales",
"patchwork",
"DiagrammeR",
"DiagrammeRsvg",
"webshot2",
"magick",
"rsvg",
"sf",
"tmap",
"ggspatial",
"prettymapr",
"units"
)
# Try to install packages if not installed
default_options <- options()
tryCatch(
{
# Disable interactivity
options(install.packages.compile.from.source = "always")
# Install package if not installed
for (package in required_packages) {
is_package_installed <- require(package, character.only = TRUE)
if (!is_package_installed & package != "osmplotr") {
cat(paste0("Installing package: ", package, "\n"))
install.packages(package)
} else {
cat(paste0("Package already installed: ", package, "\n"))
}
}
},
error = function(cond) {
stop(cond)
},
finally = {
options(default_options) # reset interactivity
}
)Load R libraries.
settings <- list()
# Infrastructure types in order
settings$type_recode_infra <- c(
PBL = "Cycle Track",
BUF = "Buffered Lane",
PL = "Painted Lane",
LSB = "Local Street\nBikeway"
)
# Infrastructure types to remove
settings$type_filter_infra <- c("N", "None", "SR")
# Road types in order
settings$type_recode_road <- c(
Arterial = "Arterial",
Collector = "Collector",
Local = "Local"
)
# Column references
settings$year_col_road <- "verify_install_year"
settings$type_col_road <- "road_type_recode"
settings$type_col_infra <- "verify_install_type"
# Set years of interest
settings$year_min <- 2009
settings$year_max <- 2022
# Plot settings
settings$line_year <- 2019
settings$basemaps <- c(
"CartoDB.Positron",
"CartoDB.DarkMatter",
"Esri.WorldGrayCanvas"
)
# Map infrastructure changes since year
settings$infra_changes_year <- 2020
# Apply map settings
tmap_options(basemaps = settings$basemaps)Calculate yearly road lengths.
The following function calculates yearly road lengths by infrastructure type using cumulative sums and filling in missing years and types.
For a given infrastructure type, the total road length for a given year is expressed below:
\[ length_{year,type} = f(year,type) = \sum_{i=year_{min}}^{year}l_{i, type}\ \mid\ l_{i, type} \geq 0 \]
Where:
#' Calculate Yearly Road Lengths By Infrastructure Type
#'
#' Calculates the cumulative yearly road lengths by infrastructure type without considering infrastructure changes.
#'
#' @param df A data.frame with three columns containing the year, type, and road lengths.
#' @param year_col The name (char) or index (int) of the column containing the years.
#' @param type_col The name (char) or index (int) of the column containing the infrastructure type
#' @param len_col The name (char) or index (int) of the column containing the road lengths.
#' @param out_col The name (char) of the column containing the calculated yearly road lengths by type.
#'
#' @return A data.frame with three columns containing the year, type, and calculated yearly road lengths by type.
#' @export
#'
calc_yearly_len <- function(
df,
year_col = "verify_install_year",
type_col = "verify_install_type",
len_col = "geometry_len_km",
out_col = "len",
year_min = settings$year_min,
year_max = settings$year_max
) {
# Convert data types
df <- as.data.frame(df)
df[[year_col]] <- as.integer(df[[year_col]])
df[[type_col]] <- as.character(df[[type_col]])
df[[len_col]] <- as.numeric(df[[len_col]])
# Remove rows with empty type
out <- df %>% filter(
!is.na(.data[[type_col]])
)
# Filter to min and max years
if (year_min > 0) {
df <- df %>% filter(
.data[[year_col]] >= year_min
)
} else {
year_min <- min(out[[year_col]], na.rm = TRUE)
}
if (year_max > 0) {
df <- df %>% filter(
.data[[year_col]] <= year_max
)
} else {
year_max <- max(out[[year_col]], na.rm = TRUE)
}
# Add dummy len for each type and year combo
# Covers cases where type and year combo does not exist
# E.g. No new PL installs in 2021, hence a record PL in 2021 does not exist
type_uniq <- unique(out[[type_col]])
type_n <- length(type_uniq)
year_uniq <- year_min:year_max
year_n <- length(year_uniq)
out <- out %>% add_row(
!!year_col := rep(year_uniq, each = type_n),
!!type_col := rep(type_uniq, year_n),
!!len_col := rep(0, type_n * year_n)
)
# Calc cumsum for each non-empty type ordered by year
out <- out %>%
arrange(.data[[year_col]]) %>%
group_by(.data[[type_col]]) %>%
mutate(
!!out_col := cumsum(.data[[len_col]])
)
# Get the last cumsum for each year and type
out <- out %>%
group_by(.data[[year_col]], .data[[type_col]]) %>%
arrange(desc(row_number())) %>%
slice(1)
# Return only the columns spec
out <- out %>% select(c(
year_col,
type_col,
out_col
))
return(out)
}Calculate yearly adjusted road length.
The following function calculates yearly adjusted road lengths by infrastructure type using cumulative sums and filling in missing years and types.
For a given infrastructure type, the total adjusted road length for a given year is expressed below:
\[ length_{year,type}^{install} + length_{year,type}^{change_i} - length_{year,type}^{replacement_i} \] Where:
#' Calculate Yearly Adjusted Road Lengths By Infrastructure Type
#'
#' Calculates the cumulative yearly adjusted road lengths by infrastructure type accounting for installations and subsequent changes.
#'
#' @param df A data.frame with three columns containing the year, type, and road lengths.
#' @param year_cols A vector of the names (char) or indices (int) of the columns containing the years of installations followed by infrastructure changes in order.
#' @param type_cols A vector of the names (char) or indices (int) of the columns containing the infrastructure types of installations followed by infrastructure changes in order.
#' @param type_col The name (char) of the column containing the type.
#' @param len_cols A vector of the names (char) or indices (int) of the columns containing the road lengths of installations followed by infrastructure changes in order.
#' @param out_cols The name (char) of the column containing the calculated yearly road lengths by type.
#' @param out_col The name (char) of the column containing the calculated yearly adjusted road lengths by type.
#' @param repl_suffix A suffix (char) to append to the columns representing the road lengths of replaced infrastructure types from changes.
#' @param ... Additional arguments passed to calc_yearly_len.
#'
#' @return A data.frame with columns containing the year, type, cumulative road lengths of installations, changes, and replacements, and calculated yearly adjusted road lengths by type.
#' @export
#'
calc_yearly_adj_len <- function(
df,
year_cols = c("verify_install_year", "verify_upgrade1_year", "verify_upgrade2_year"),
type_cols = c("verify_install_type", "verify_upgrade1_type", "verify_upgrade2_type"),
type_col = "type",
len_cols = "geometry_len_km",
out_cols = c("install_len", "upgrade1_len", "upgrade2_len"),
out_col = "adj_len",
repl_suffix = "_replaced",
...
) {
# Ensure df
df <- as.data.frame(df)
# Convert len_col if char
len_cols <- rep(len_cols, length(year_cols))
# Check cols same size
year_cols_n <- length(year_cols)
type_cols_n <- length(type_cols)
len_cols_n <- length(len_cols)
out_cols_n <- length(out_cols)
if (length(unique(c(year_cols_n, type_cols_n, len_cols_n, out_cols_n))) != 1) {
stop(glue(
"The arguments 'year_cols' ({year_cols_n}), 'type_cols' ({type_cols_n}), 'len_cols' ({len_cols_n}), and 'out_cols' ({out_cols_n}) must be the same length."
))
}
# Calc yearly lens by infra type per install or change
out <- list()
for (i in 1:length(year_cols)) {
# Get year, type, and len cols
ycol <- year_cols[[i]]
tcol <- type_cols[[i]]
lcol <- len_cols[[i]]
ocol <- out_cols[[i]]
# Calc yearly len for install or change
has_infra <- !is.na(df[[tcol]]) %>% all
if (has_infra) {
out <- append(
out,
calc_yearly_len(
df,
year_col = ycol,
type_col = tcol,
len_col = lcol,
out_col = ocol,
...
) %>%
rename(
"year" := !!ycol,
"type" := !!tcol
) %>% list
)
}
# Calc yearly len for replacement
if (i > 1) {
# Get repl cols
tcol_repl <- type_cols[[i - 1]]
lcol_repl <- len_cols[[i - 1]]
# Filter for repl records only where type is not eq to change type
df_repl <- df %>% filter(.data[[tcol]] != .data[[tcol_repl]])
# Calc repl len if there are any changes
has_change <- !is.na(df_repl[[tcol]]) %>% all
if (has_change) {
out <- append(
out,
calc_yearly_len(
df_repl,
year_col = ycol,
type_col = tcol_repl,
len_col = lcol_repl,
out_col = glue("{ocol}{repl_suffix}"),
...
) %>%
rename(
"year" := !!ycol,
"type" := !!tcol_repl
) %>% list
)
}
}
}
# Combine all lens in list to single df
out <- out %>%
reduce(
left_join, by = c("year", "type")
) %>%
ungroup()
# Create template for change and repl cols
change_cols <- paste0(out_cols[2:out_cols_n])# change cols
change_cols <- c(change_cols, paste0(out_cols[2:out_cols_n], repl_suffix)) # repl cols
change_cols_add <- rep(0, length(change_cols)) # set default vals
names(change_cols_add) <- change_cols
# Add change and repl cols set to 0 if not present
out <- out %>% add_column(
!!!change_cols_add[setdiff(names(change_cols_add), names(.))]
)
# Set NA to 0
out <- out %>% mutate(
across(everything(), ~replace_na(., 0))
)
# Calc yearly adj lens by infra type
out <- out %>%
mutate( # added len by infra types due to install or changes
!!out_col := reduce(across(all_of(out_cols)), `+`)
) %>%
mutate( # removed len by infra types due to replacements
!!out_col := .data[[out_col]] - reduce(
across(all_of(
paste0(out_cols[2:out_cols_n], repl_suffix)
)),
`-`
)
)
# Rename type col
out <- out %>% rename(!!type_col := type)
return(out)
}Plot road lengths by generic types.
This function plots an area chart showing the cumulative road lengths by a user-defined type for each year.
This is a generic function for user-defined types such as infrastructure or road types.
#' Plot Yearly Road Lengths By Type
#'
#' Creates an area plot of road lengths by category types.
#'
#' @param df A data.frame with three columns containing the year, type, and road lengths.
#' @param title The title (char) of the plot.
#' @param title_underline Set to TRUE to underline the title.
#' @param x_title The title (char) of the x-axis.
#' @param y_title The title (char) of the y-axis.
#' @param y_suffix The suffix (char) to add to the end of y axis values.
#' @param legend_title The title (char) of the legend.
#' @param legend Set to TRUE to include a legend.
#' @param year_col The name (char) or index (int) of the column containing the years.
#' @param year_min The minimum year (int) to display.
#' @param year_max The maximum year (int) to display.
#' @param year_int The year intervals (int) to display. For example, 1 displays every year, and 2 displays every two years.
#' @param len_col The name (char) or index (int) of the column containing the road lengths.
#' @param type_col The name (char) or index (int) of the column containing the type.
#' @param type_filter A vector (char) of types to remove fomr the plot.
#' @param type_recode A named vector (char) of names representing types and values representing the values to replace each type with.
#' @param line_50km Set to TRUE to draw the 50 km red reference line.
#' @param line_year Set to a year (int) to draw a reference line for a year. If FALSE, a line will not be drawn.
#' @param color_low The bottom color (char) of the type.
#' @param color_high The top color (char) of the type.
#' @return An area ggplot of the cumulative yearly road lengths by type.
#' @export
#'
plot_yearly_len <- function(
df,
title = "",
title_underline = TRUE,
x_title = "",
y_title = "",
y_suffix = " km",
legend_title = "Type",
legend = TRUE,
year_col = "year",
year_min = FALSE,
year_max = FALSE,
year_int = 1,
len_col = "adj_len",
type_col = "type",
type_filter = c(),
type_recode = c(),
line_50km = FALSE,
line_year = FALSE,
color_low = "#DFEBF7",
color_high = "#3683BB"
) {
# Filter to start and end years
if (year_min > 0) {
df <- df %>% filter(
.data[[year_col]] >= year_min
)
}
if (year_max > 0) {
df <- df %>% filter(
.data[[year_col]] <= year_max
)
}
# Filter out particular infrastructure types
if (length(type_filter) > 0) {
df <- df %>% filter(
!.data[[type_col]] %in% type_filter
)
}
# Recode and reorder category types
if (length(type_recode) > 0) {
# Reorder category types
type_uniq <- unique(df[[type_col]])
type_reorder <- names(type_recode)
type_reorder <- c(type_reorder, type_uniq[!type_uniq %in% type_reorder])
df[[type_col]] <- factor(df[[type_col]], levels = type_reorder)
# Recode category types
df[[type_col]] <- recode(df[[type_col]], !!!type_recode)
}
# Create fill colors
type_n <- length(type_uniq)
type_colors <- scales::seq_gradient_pal(
color_low,
color_high
)(seq(0, 1, length.out = type_n))
# Create base area plot with legend and labels
len_max <- max(df[[len_col]], na.rm = TRUE)
year_max <- max(df[[year_col]], na.rm = TRUE)
out <- ggplot(
df,
aes(
x = .data[[year_col]],
y = .data[[len_col]],
fill = .data[[type_col]],
order = desc(.data[[type_col]])
)
) +
geom_area(colour = NA, alpha = 0.7) +
scale_fill_manual(values = type_colors) +
geom_line(
position = "stack",
size = 0.2
) +
labs(
x = x_title,
y = y_title,
fill = legend_title
) +
guides(
fill = FALSE,
color = FALSE
) +
scale_x_continuous(
breaks = seq(year_min, year_max, by = year_int),
labels = seq(year_min, year_max, by = year_int),
limits = c(year_min, year_max)
) +
scale_y_continuous(
label = scales::label_number(suffix = y_suffix)
) +
theme_minimal() +
theme(
plot.margin = unit(c(5,5,5,5), "points")
)
# Add title
if (title_underline) {
out <- out + ggtitle(
bquote(underline(.(title)))
)
} else {
out <- out + ggtitle(title)
}
# Add legend
if (legend) {
out <- out + guides(fill = guide_legend(
reverse = FALSE,
override.aes = list(
alpha = 0.7,
color = NA,
shape = NA
)
))
}
# Add dotted year ref line
if (line_year) {
out <- out + geom_vline(
xintercept = line_year,
color = "black",
linetype = "dashed"
)
}
# Add red 50km ref line
if (line_50km) {
out <- out + geom_segment( # 50km red line
aes(
x = 2009,
y = 0,
xend = 2009,
yend = 50,
color = "#bb0000"
)
) +
geom_segment( # 50km red triangle point down
aes(
x = 2009,
y = 50.01 - (len_max * 0.05),
xend = 2009,
yend = 50 - (len_max * 0.05),
color = "#bb0000"
),
arrow = arrow(
length = unit(0.03, "npc"),
ends = "last",
type = "closed"
)
) +
geom_segment( # 50km red triangle point up
aes(
x = 2009,
y = (len_max * 0.05) - 0.01,
xend = 2009,
yend = (len_max * 0.05),
color = "#bb0000"
),
arrow = arrow(
length = unit(0.03, "npc"),
ends = "last",
type = "closed"
)
) +
annotate(
"text",
x = 2009,
y = 50,
label = "50km",
color = "#bb0000",
hjust = -0.225
)
}
return(out)
}Plot yearly adjusted road lengths by infrastructure type.
This function plots area charts of yearly road lengths by infrastructure types for a list of data.
This uses the plot_yearly_len function.
#' Plot Yearly Road Lengths By Infrastructure Type
#'
#' Creates area plots of road lengths by infrastructure type.
#'
#' @param df_list A list of data.frame containing the install and change years, type, and road segment lengths.
#' @return Multiple area ggplots of the cumulative yearly road lengths by infrastructure type combined with patchwork.
#' @export
#'
plot_yearly_len_infra <- function(df_list) {
# Create infra plots from data
p <- list()
for (i in 1:length(df_list)) {
# Get data and plot title
df <- df_list[[i]]
ptitle <- names(df_list)[[i]]
# Create and add infra plot to list
p[[i]] <- calc_yearly_adj_len(df, type_col = settings$type_col_infra) %>%
plot_yearly_len(
title = ptitle,
year_min = settings$year_min,
year_max = settings$year_max,
type_col = settings$type_col_infra,
type_filter = settings$type_filter_infra,
type_recode = settings$type_recode_infra,
legend_title = "Infrastructure Type",
line_50km = TRUE,
line_year = settings$line_year
)
}
# Y-axis title
y_title <- ggplot() +
annotate(
geom = "text",
x = 1,
y = 1,
label = "Total Length (Centreline km)",
angle = 90,
size = 5
) +
coord_cartesian(clip = "off")+
theme_void()
# Combine all infra plots together
out <- (y_title | wrap_plots(p, nrow = length(p))) +
plot_annotation(
title = "Roadways with Dedicated Cycling Infrastructure",
caption = sprintf("Years (%s-%s)", settings$year_min, settings$year_max),
theme = theme(
plot.title = element_text(hjust = 0.5, size = 16),
plot.caption = element_text(hjust = 0.5, size = 14)
)
) +
plot_layout(widths = c(0.05, 1))
return(out)
}Plots yearly adjusted road lengths for road types.
This function plots area charts of yearly road lengths by overall road type and by infrastructure separated by each road type.
This uses the plot_yearly_len function.
#' Plot Yearly Road Lengths By Road Type
#'
#' Creates area plots of road lengths by overall road type, and by infrastructure per road type.
#'
#' @param df The data.frame containing the install and change years, type, and road segment types and lengths.
#' @return Multiple area ggplots of the cumulative yearly road lengths by road type combined with patchwork.
#' @export
#'
plot_yearly_len_road <- function(df, title = "Roadways with Dedicated Cycling Infrastructure") {
# Create list to store plots
p <- list()
# Plot overall road types
p[[1]] <- calc_yearly_len(
df,
year_col = settings$year_col_road,
type_col = settings$type_col_road
) %>%
plot_yearly_len(
title = title,
title_underline = FALSE,
year_col = settings$year_col_road,
year_min = settings$year_min,
year_max = settings$year_max,
x_title = sprintf("Years (%s-%s)", settings$year_min, settings$year_max),
y_title = "Total Length (Centreline km)",
legend_title = "Roadway Type",
type_col = settings$type_col_road,
type_recode = settings$type_recode_road,
len_col = "len",
line_50km = FALSE,
line_year = settings$line_year,
color_low = "#C1DDB3",
color_high = "#297A22"
) +
theme(
plot.title = element_text(size = 18),
plot.margin = margin(0, 0, 0, 0, "pt")
)
# Plot arterial, collector, and local road by infra
rtypes <- c("Arterial", "Collector", "Local")
for (i in 1:length(rtypes)) {
# Get road type
r <- rtypes[i]
# Create infra plot for road type
p[[i + 1]] <- calc_yearly_adj_len(
df %>% filter(.data[[settings$type_col_road]] == r),
type_col = settings$type_col_infra
) %>%
plot_yearly_len(
title = sprintf("%s Roadways", r),
title_underline = FALSE,
line_50km = FALSE,
line_year = settings$line_year,
year_int = 2,
x_title = sprintf("Years (%s-%s)", settings$year_min, settings$year_max),
y_title = "Total Length (Centreline km)",
year_min = settings$year_min,
year_max = settings$year_max,
type_col = settings$type_col_infra,
type_filter = settings$type_filter_infra,
type_recode = settings$type_recode_infra,
legend_title = "Infrastructure Type"
) +
theme(
plot.title = element_text(size = 14),
plot.margin = margin(0, 12, 0, 0, "pt")
)
}
# Plot horizontal gradient bar
grad_bar <- ggplot(data.frame(x = 1:4), aes(x = x, y = 1, color = x)) +
geom_line(size = 4) +
scale_color_gradient(low = "#C1DDB3", high = "#297A22") +
theme_void() +
guides(color = FALSE) +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
plot.margin = margin(0, 0, 0, 0, "pt")
)
# Plot overall and road type plots together
out <- ( # overall plot
plot_spacer() +
p[[1]] +
plot_spacer() +
plot_layout(
widths = c(0.25, 0.35, 0.2)
)
) / ( # gradient bar
plot_spacer() +
grad_bar +
plot_spacer() +
plot_layout(widths = c(-0.8, 10, -1.1))
) / ( # infra plots
p[[2]] +
p[[3]] +
p[[4]]
) + plot_layout(
heights = c(12, 1, 8)
) + plot_annotation( # A B tags
tag_levels = list(c("A", "", "B", "", ""))
) & theme(
plot.tag = element_text(face = "bold", size = 12)
)
return(out)
}Plots differences between two years.
This function plots a bar chart of differences between two columns containing years.
This function is used to check the differences in installation years between the city’s data and the verified data.
#' Plot Yearly Differences
#'
#' Creates a bar plot of the differences between two years.
#'
#' @param df The data.frame containing the two columns with the years.
#' @param year_col1 The name (char) or index (int) of the first year column.
#' @param year_col2 The name (char) or index (int) of the second year column to be subtracted from.
#' @param year_col1_name The name alias (char) of the first year column year_col1.
#' @param year_col2_name The name alias (char) of the second year column year_col2.
#' @param year_min The minimum year (int) to calculate differences for.
#' @param year_max The maximum year (int) to calculate differences for.
#' @param title The title (char) of the plot.
#' @param title_n Set to TRUE to add the number of total segments considered.
#' @param x_title The title (char) of the x-axis.
#' @param y_title The title (char) of the y-axis.
#' @param x_breaks The number (int) of breaks to show on the x-axis. Set to FALSE to let ggplot automatically decide.
#' @param x_perc Set to TRUE to show proportions and FALSE to show counts.
#' @param out_data Set to TRUE to return a list
#'
#' @return A ggplot of yearly differences (year_col2 - year_col1), displaying the proportion of rows for each difference in years. If `out_data` is TRUE then returns a list with keys `data` representing the data used for plotting and `plot` with the ggplot object.
#' @export
#'
plot_yearly_diff <- function(
df,
year_col1 = "install_year",
year_col2 = "verify_install_year",
year_col1_name = "City Year",
year_col2_name = "Verified Year",
year_min = settings$year_min,
year_max = settings$year_max,
title = sprintf(
"Difference in Years, Comparing %s and %s",
year_col1_name,
year_col2_name
),
title_n = TRUE,
x_title = sprintf(
"Difference in Years (%s - %s)",
year_col2_name,
year_col1_name
),
y_title = "Proportion of Total Segments",
x_breaks = 15,
x_perc = TRUE,
out_data = FALSE
) {
# Filter for comparable rows only
pdata <- df %>% filter(
!is.na(.data[[year_col1]]) & !is.na(.data[[year_col2]])
)
# Filter within min year
if (year_min) {
pdata <- pdata %>% filter(
.data[[year_col1]] >= year_min | .data[[year_col2]] >= year_min
)
}
# Filter within max year
if (year_max) {
pdata <- pdata %>% filter(
.data[[year_col1]] <= year_max | .data[[year_col2]] <= year_max
)
}
# Add n to title
if (title_n) {
title <- sprintf("%s (n=%s)", title, nrow(pdata))
}
# Calc yearly diff
pdata <- pdata %>%
mutate(year_diff = .data[[year_col2]] - .data[[year_col1]]) %>%
count(year_diff) %>%
mutate(n_perc = (n / sum(n)) * 100)
# Set to proportions or counts
pdata$y <- if (x_perc) pdata$n_perc else pdata$n
# Plot yealy diffs
out <- pdata %>%
ggplot(aes(
x = year_diff,
y = y
)) +
geom_bar(
stat = "identity",
color = "#332a94",
fill = "#c3d5e4",
width = 1
) +
labs(
title = title,
x = x_title,
y = y_title
) +
theme(
plot.title = element_text(size = 12)
)
# Add percentage sign if percentages
if (x_perc) {
out <- out +
scale_y_continuous(
label = scales::label_number(suffix = "%")
)
}
# Set x interval breaks
if (x_breaks) {
out <- out + scale_x_continuous(
breaks = scales::breaks_pretty(x_breaks)
)
}
# Returns ggplot obj or list
if (out_data) {
out <- list(
data = pdata,
plot = out
)
}
return(out)
}Fitler for segment inclusion criteria
This function applies segment inclusion critieria to a list of data.frames. Optionally creates a data.frame of counts, segment lengths, and other exclusions (duplicates, misclassifications) per inclusion criteria step along with a list of the data.frames after applying the inclusion criteria.
#' Filter for Segment Inclusion Criteria
#'
#' This function applies segment inclusion critieria to a list of data.frames. Optionally creates a data.frame of counts, segment lengths, and other exclusions (duplicates, misclassifications) per inclusion criteria step along with a list of the data.frames after applying the inclusion criteria.
#'
#' @param criteria_data A list of lists, where each list contains the following structure defining the segment inclusion criteria for each city:
#' \itemize{
#' \item \code{city}: the name (char) of the city (required).
#' \item \code{data}: the data.frame containing road segments and applicable columns for inclusion criteria filtering (required).
#' \item \code{data_date}: the date (char) that the data was acquired.
#' \item \code{infra_col}: the column name (char) of the column containing the dedicated cycling infrastructure types to filter.
#' \item \code{infra_filter}: the vector of characters of dedicated cycling infrastructure types to include.
#' \item \code{road_col}: the column name (char) of the column containing the road location types to filter.
#' \item \code{road_filter}: the vector of characters of road location types to exclude.
#' \item \code{status_col}: the column name (char) of the column containing the inactive road status types to filter.
#' \item \code{status_filter}: the vector of characters of inactive road status types to include.
#' \item \code{geom_col}: the column name (char) of the column containing geometries.
#' \item \code{geom_unit}: the unit measure (char) of the geometry
#' \item \code{geom_filter}: Set to TRUE to filter for null and duplicate geometries.
#' \item \code{misclass_col}: the column name (char) of the column containing misclassification types to filter.
#' \item \code{misclass_filter}: the vector of characters indicating non-misclassified rows of data to include. Usually set to c("NA", NA) to indicate that the row is not misclassified.
#' \item \code{noverify_col}: the column containing infrastructure install types (char) that are not verified. This does not filter the data, but calculates and adjusts for the rows and road lengths of non-verified segments.
#'. \item \code{noverify_filter}: the vector of characters of non-verified infrastructure install types from the city. This does not filter the data, but calculates and adjusts for the rows and road lengths of non-verified segments.
#' }
#' @param len_func A function to apply to road length calculations. The default is a function that converts from meters to km.
#'
#' @return A list of lists, where each list has keys and values from \code{criteria_data}, and the following additional keys:
#' \itemize{
#' \item \code{data_filter}: the data.frame after filtering for segment inclusion criteria (required).
#' \item \code{infra_filter_applied}: TRUE if dedicated cycling infrastructure filter was applied and FALSE otherwise (required).
#' \item \code{infra_filter_n}: total rows (numeric) after filtering for dedicated cycling infrastructure using \code{infra_filter} (required).
#' \item \code{infra_filter_len}: total road length (numeric) after filtering for dedicated cycling infrastructure using \code{infra_filter} (required).
#' \item \code{infra_filter_nx}: total rows (numeric) affected by filtering for dedicated cycling infrastructure using \code{infra_filter} (required).
#' \item \code{infra_filter_lenx}: total road length (numeric) affected by filtering for dedicated cycling infrastructure using \code{infra_filter} (required).
#' \item \code{road_filter_applied}: TRUE if road location filter was applied and FALSE otherwise (required).
#' \item \code{road_filter_n}: total rows (numeric) after filtering for road location using \code{infra_filter} (required).
#' \item \code{road_filter_len}: total road length (numeric) after filtering for road location using \code{infra_filter} (required).
#' \item \code{road_filter_nx}: total rows (numeric) affected by filtering for road location using \code{infra_filter} (required).
#' \item \code{road_filter_lenx}: total road length (numeric) affected by filtering for road location using \code{infra_filter} (required).
#' \item \code{status_filter_applied}: TRUE if inactive road status filter was applied and FALSE otherwise (required).
#' \item \code{status_filter_n}: total rows (numeric) after filtering for inactive road status using \code{status_filter} (required).
#' \item \code{status_filter_len}: total road length (numeric) after filtering for inactive road status using \code{status_filter} (required).
#' \item \code{status_filter_nx}: total rows (numeric) affected by filtering for inactive road status using \code{status_filter} (required).
#' \item \code{status_filter_lenx}: total road length (numeric) affected by filtering for inactive road status using \code{status_filter} (required).
#' \item \code{geom_filter_null_applied}: TRUE if null geometries filter was applied and FALSE otherwise (required).
#' \item \code{geom_filter_null_n}: total rows (numeric) after filtering for null geometries (required).
#' \item \code{geom_filter_null_len}: total road length (numeric) after filtering for null geometries (required).
#' \item \code{geom_filter_null_nx}: total rows (numeric) affected by filtering for null geometries (required).
#' \item \code{geom_filter_null_lenx}: total road length (numeric) affected by filtering for null geometries (required).
#' \item \code{geom_filter_dup_applied}: TRUE if duplicate geometries filter was applied and FALSE otherwise (required).
#' \item \code{geom_filter_dup_n}: total rows (numeric) after filtering for duplicate geometries (required).
#' \item \code{geom_filter_dup_len}: total road length (numeric) after filtering for duplicate geometries (required).
#' \item \code{geom_filter_dup_nx}: total rows (numeric) affected by filtering for duplicate geometries (required).
#' \item \code{geom_filter_dup_lenx}: total road length (numeric) affected by filtering for duplicate geometries (required).
#' \item \code{elig_n}: total rows (numeric) after the above filters eligible for data entry and screening (required).
#' \item \code{elig_len}: total road length (numeric) after the above filters eligible for data entry and screening (required).
#' \item \code{misclass_filter_applied}: TRUE if null misclassifications filter was applied and FALSE otherwise (required).
#' \item \code{misclass_filter_n}: total rows (numeric) after filtering misclassifications using \code{misclass_filter} (required).
#' \item \code{misclass_filter_len}: total road length (numeric) after filtering misclassifications using \code{misclass_filter} (required).
#' \item \code{misclass_filter_nx}: total rows (numeric) affected by filtering misclassifications using \code{misclass_filter} (required).
#' \item \code{misclass_filter_lenx}: total road length (numeric) affected by misclassifications using \code{misclass_filter} (required).
#' \item \code{misclass_filter_uniq_n}: a data.frame of total rows for each misclassification type.
#' \item \code{misclass_filter_uniq_len}: a data.frame of total road lengths for each misclassification type.
#' \item \code{noverify_filter_applied}: TRUE if non-verified infrastructure filter was calculated and FALSE otherwise (required).
#' \item \code{noverify_filter_nx}: total rows (numeric) of non-verified infrastructure from \code{noverify_filter} (required).
#' \item \code{noverify_filter_lenx}: total road length (numeric) affected by non-verified infrastructure using \code{noverify_filter} (required).
#' \item \code{incl_n}: final total rows (numeric) after the above filters (required).
#' \item \code{incl_len}: final total road length (numeric) after the above filters (required).
#' }
#' @export
#'
filter_criteria <- function(
criteria_data,
len_func = function (x) as.numeric(x) / 1000
) {
# Apply criteria to list and track counts and lengths
out <- criteria_data
for (i in 1:length(criteria_data)) {
# Get criteria data
x <- criteria_data[[i]]
df <- x$data
city <- x$city
# Set initial apply status for filters
out[[city]]$infra_filter_applied <- FALSE
out[[city]]$road_filter_applied <- FALSE
out[[city]]$status_filter_applied <- FALSE
out[[city]]$geom_filter_null_applied <- FALSE
out[[city]]$geom_filter_dup_applied <- FALSE
out[[city]]$misclass_filter_applied <- FALSE
out[[city]]$noverify_filter_applied <- FALSE
# Count/len initial
out[[city]]$data_n <- nrow(df)
out[[city]]$data_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
# Filter for dedicated cycling infra
if (all(c("infra_col", "infra_filter") %in% names(x))) {
# Apply ded cyc infra filter
df <- df %>%
filter(.data[[x$infra_col]] %in% x$infra_filter)
# Set ded cyc infra filter status
out[[city]]$infra_filter_applied <- TRUE
}
# Count/len ded cyc infra filter
out[[city]]$infra_filter_n <- nrow(df)
out[[city]]$infra_filter_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
# Count/len affected by ded cyc infra filter
out[[city]]$infra_filter_nx <- out[[city]]$data_n - out[[city]]$infra_filter_n
out[[city]]$infra_filter_lenx <- out[[city]]$data_len - out[[city]]$infra_filter_len
# Filter for road location
if (all(c("road_col", "road_filter") %in% names(x))) {
# Apply road filter
df <- df %>%
filter(!.data[[x$road_col]] %in% x$road_filter)
# Set road filter status
out[[city]]$road_filter_applied <- TRUE
}
# Count/len road filter
out[[city]]$road_filter_n <- nrow(df)
out[[city]]$road_filter_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
# Count/len affected by road filter
out[[city]]$road_filter_nx <- out[[city]]$infra_filter_n - out[[city]]$road_filter_n
out[[city]]$road_filter_lenx <- out[[city]]$infra_filter_len - out[[city]]$road_filter_len
# Filter for status
if (all(c("status_col", "status_filter") %in% names(x))) {
# Apply status filter
df <- df %>%
filter(!.data[[x$status_col]] %in% x$status_filter)
# Set status filter status
out[[city]]$status_filter_applied <- TRUE
}
# Count/len status filter
out[[city]]$status_filter_n <- nrow(df)
out[[city]]$status_filter_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
# Count/len affected by status filter
out[[city]]$status_filter_nx <- out[[city]]$road_filter_n - out[[city]]$status_filter_n
out[[city]]$status_filter_lenx <- out[[city]]$road_filter_len - out[[city]]$status_filter_len
# Filter for null geoms
if (all(c("geom_col", "geom_filter") %in% names(x))) {
# Apply null geom filter
df <- df %>%
filter(!is.na(.data[[x$geom_col]]))
# Set dup geom filter status
out[[city]]$geom_filter_null_applied <- TRUE
}
# Count/len null geom filter
out[[city]]$geom_filter_null_n <- nrow(df)
out[[city]]$geom_filter_null_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
# Count/len affected by null geom filter
out[[city]]$geom_filter_null_nx <- out[[city]]$status_filter_n - out[[city]]$geom_filter_null_n
out[[city]]$geom_filter_null_lenx <- out[[city]]$status_filter_len - out[[city]]$geom_filter_null_len
# Filter for dup geoms
if (all(c("geom_col", "geom_filter") %in% names(x))) {
# Apply dup geom filter
df <- df %>%
distinct(.data[[x$geom_col]], .keep_all = TRUE)
# Set dup geom filter status
out[[city]]$geom_filter_dup_applied <- TRUE
}
# Count/len dupl geom filter
out[[city]]$geom_filter_dup_n <- nrow(df)
out[[city]]$geom_filter_dup_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
# Count/len affected by dupl geom filter
out[[city]]$geom_filter_dup_nx <- out[[city]]$geom_filter_null_n - out[[city]]$geom_filter_dup_n
out[[city]]$geom_filter_dup_lenx <- out[[city]]$geom_filter_null_len - out[[city]]$geom_filter_dup_len
# Calculate noverify segments
if (all(c("noverify_col", "noverify_filter") %in% names(x))) {
# Apply noverify filter separately
df_noverify <- df %>%
filter(!is.na(.data[[x$noverify_col]]))
# Set noverify filter status
out[[city]]$noverify_filter_applied <- TRUE
# Count/len of noverify segments
out[[city]]$noverify_filter_nx <- df_noverify %>% nrow
out[[city]]$noverify_filter_lenx <- len_func(sum(st_length(df_noverify[[x$geom_col]]), na.rm = TRUE))
} else {
# Set to 0 if all segments are verified
out[[city]]$noverify_filter_nx <- len_func(as_units(0, "meters"))
out[[city]]$noverify_filter_lenx <- len_func(as_units(0, "meters"))
}
# Count/len eligible
out[[city]]$elig_n <- nrow(df) - out[[city]]$noverify_filter_nx
out[[city]]$elig_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE)) - out[[city]]$noverify_filter_lenx
# Filter for misclass
if (all(c("misclass_col", "misclass_filter") %in% names(x))) {
# Count/len misclass groups
out[[city]]$misclass_filter_uniq_n <- df %>%
filter(!is.na(.data[[x$misclass_col]])) %>%
count(.data[[x$misclass_col]]) %>%
as_tibble
out[[city]]$misclass_filter_uniq_len <- df %>%
filter(!.data[[x$misclass_col]] %in% x$misclass_filter) %>%
group_by(.data[[x$misclass_col]]) %>%
summarize(len = len_func(sum(st_length(.data[[x$geom_col]]), na.rm = TRUE))) %>%
as_tibble
# Apply misclass filter
df <- df %>%
filter(.data[[x$misclass_col]] %in% x$misclass_filter)
# Set misclass filter status
out[[city]]$misclass_filter_applied <- TRUE
}
# Count/len misclass filter
out[[city]]$misclass_filter_n <- nrow(df) - out[[city]]$noverify_filter_nx
out[[city]]$misclass_filter_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE)) - out[[city]]$noverify_filter_lenx
# Count/len affected by misclass filter
out[[city]]$misclass_filter_nx <- out[[city]]$elig_n - out[[city]]$misclass_filter_n
out[[city]]$misclass_filter_lenx <- out[[city]]$elig_len - out[[city]]$misclass_filter_len
# Count/len eligible
out[[city]]$incl_n <- nrow(df) - out[[city]]$noverify_filter_nx
out[[city]]$incl_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE)) - out[[city]]$noverify_filter_lenx
# Save filtered data
out[[city]]$data_filter <- df
}
return(out)
}Diagram the segment inclusion criteria results.
This function draws a flow diagram of overall methods for segment
inclusion criteria using output from filter_criteria.
#' Diagram the segment inclusion criteria results
#'
#' This function draws a flow diagram of overall methods for segment inclusion criteria using output from \code{\link{filter_criteria}}.
#'
#' @param criteria_data A list of lists in the structure of the output from \code{\link{filter_criteria}}.
#' @param note A note (char) to display at the end of the diagram.
#' @return A \code{\link[DiagrammeR]{grViz}} object.
#' @export
#'
diag_criteria <- function(
criteria_data,
note = "*Denotes segments misclassified as an ineligible type (off-street path, shared road, or inactive temporary infrastructure)<br/>**Local Street Bikeways (LSB) were included but not screened or verified as they did not fit the Can-BICS definitions"
) {
# Diag settings
diag_settings <- "
rankdir = LR
node[
shape = box,
width = 2.75,
height = 1.65,
style = filled,
fillcolor = white,
penwidth = 1.5,
fontname = 'Arial'
]
edge[
arrowhead = vee,
arrowtail = vee
]
layout = neato
"
# Top header row
row_top <- "
open_data[
label = 'Open Data',
height = 0.5,
fillcolor = '#d7e9fe',
pos = '0,1!'
]
elig_data[
label = 'Eligible Segments',
height = 0.5,
fillcolor = '#d7e9fe',
pos = '3.25,1!'
]
incl_data[
label = 'Inclusions',
height = 0.5,
fillcolor = '#d7e9fe',
pos = '6.5,1!'
]
"
# Create template for row addition
row_temp <- "
open{i}[
label = <<b>{city}</b><br/>N = {open_n} Segments<br/><i>({open_len})<br/>Downloaded: {open_date}</i>>,
pos = '0,{y}!'
]
elig{i}[
label = <n = {elig_n} Segments<br/>({elig_len})<i><br/><b>Exclusions</b>{elig_inelig}{elig_dup}{elig_poly}</i>>,
pos = '3.25,{y}!'
]
incl{i}[
label = <n = {incl_n} Segments<br/><i>({incl_len}){noverify}<br/><b>Exclusions</b>{incl_miss}{incl_dup}</i>>,
pos = '6.5,{y}!'
]
open{i} -> elig{i} -> incl{i}
"
# Generate row additions per city
y <- -0.21
y_gap <- 1.85
row_adds <- ""
for (i in 1:length(criteria_data)) {
# Vars per city
criteria <- criteria_data[[i]]
# Generate geom filter dup info
elig_dup <- ""
if (criteria$geom_filter_dup_nx > 0) {
elig_dup <- glue(
"<br/>Duplicates: n = {n}",
n = criteria$geom_filter_dup_nx
)
}
# Generate geom filter null info
elig_poly <- ""
if (criteria$geom_filter_null_nx > 0) {
elig_poly <- glue(
"<br/>No Polyline Data: n = {n}",
n = criteria$geom_filter_null_nx
)
}
# Generate inelig info
elig_inelig <- glue(
"<br/>Ineligible: n = {n}",
n = criteria[["infra_filter_nx"]] + criteria[["status_filter_nx"]] + criteria[["road_filter_nx"]]
)
# Generate noverify info
noverify <- ""
if (criteria$noverify_filter_applied) {
noverify <- glue(
"<br/>**Screened: n = {n}<br/>**Not screened: n = {nx}",
n = criteria$elig_n,
nx = criteria$noverify_filter_nx
)
}
# Generate incl info
incl_miss <- glue(
"<br/>*Misclassifications: n = {n}",
n = criteria[["misclass_filter_nx"]]
)
# Road length unit
if ("geom_unit" %in% names(criteria)) {
len_unit <- criteria$geom_unit
} else {
len_unit = "meters"
}
# Generate single row addition
row_adds <- paste0(row_adds, glue(
row_temp,
i = i,
y = y,
city = str_to_title(criteria[["city"]]),
open_n = criteria[["data_n"]],
open_len = paste(round(criteria[["data_len"]], 1), len_unit),
open_date = criteria[["data_date"]],
elig_n = criteria$elig_n + criteria$noverify_filter_nx,
elig_len = paste(round(criteria[["elig_len"]], 1), len_unit),
elig_inelig = elig_inelig,
elig_dup = elig_dup,
elig_poly = elig_poly,
incl_n = criteria[["incl_n"]] + criteria$noverify_filter_nx,
incl_len = paste(round(criteria[["incl_len"]] + criteria$noverify_filter_lenx, 1), len_unit),
incl_miss = incl_miss,
incl_dup = "",
noverify = noverify
))
# Move row below
y <- y - y_gap
}
# Filter and screening lines
line_filter <- glue("
filter1[
label = 'Filtering',
height = 0.25,
shape = plaintext,
style='', pos = '1.6,1.425!'
]
filter2[
style = invis,
pos = '1.6,{y}!'
]
filter1 -> filter2 [style = dashed, dir = none, color = '#b0b0b0']
", y = y - -0.96)
line_screen <- glue("
screen1[
label = 'Screening',
height = 0.25,
shape = plaintext,
style='', pos = '4.85,1.425!'
]
screen2[
style = invis,
pos = '4.85,{y}!'
]
screen1 -> screen2 [style = dashed, dir = none, color = '#b0b0b0']
", y = y - -0.96)
# Bottom note
note_bottom <- glue("
note[
label=<<i>{text}</i>>,
style = '',
shape = plaintext,
fontsize = 12,
pos = '3.25,{y}!'
]
", text = note, y = y - -0.69)
# Generate graphviz diag
out <- grViz(paste0(
"digraph {",
diag_settings,
row_top,
row_adds,
line_filter,
line_screen,
note_bottom,
"}"
))
return(out)
}Diagram the segment inclusion criteria results in detail.
This function draws a flow diagram of detailed methods for segment
inclusion criteria using output from filter_criteria.
#' Diagram the segment inclusion criteria results in detail
#'
#' This function draws a flow diagram of detailed methods for segment inclusion criteria using output from \code{\link{filter_criteria}}.
#'
#' @param criteria_data A list of lists in the structure of the output from \code{\link{filter_criteria}}.
#' @param city The city (char) to create the diagram for. If `NULL`, this function produces a list of diagrams where keys are the city name and values are the diagrams.
#' @param out_render Set to TRUE to render the diagram and return \code{\link[DiagrammeR]{grViz}} objects or FALSE to return the text used to generate the diagram.
#' @return A list of \code{\link[DiagrammeR]{grViz}} objects if `city` is `NULL`, or a single \code{\link[DiagrammeR]{grViz}} if `city` is provided. The \code{\link[DiagrammeR]{grViz}} objects become text (char) if `out_render` is `FALSE`.
#' @export
#'
diag_criteria_details <- function(criteria_data, city = NULL, out_render = TRUE) {
# Filter for city if avail
if (!is.null(city)) {
criteria_data <- criteria_data[sapply(criteria_data, function (x) x$city == city)]
}
# Generate diagrams for each city
out <- list()
for (i in 1:length(criteria_data)) {
# Diag vars
criteria <- criteria_data[[i]]
x_edge <- -4
# Diag settings
diag_settings <- "
rankdir = TB
node[
shape = box
width = 10
height = 1.8
style = filled
fillcolor = white
penwidth = 1.5
fontsize = 16
fontname = 'Arial'
margin = 0.25
]
edge[
arrowhead = vee,
arrowtail = vee
]
layout = neato
"
# Step 1 identification
s1 <- glue("
id_title[
label = <<b>Identification</b>>
pos = '-8.5,0!'
width = 2
height = 1.9
fillcolor = '#d7e9fe'
style = 'rounded,filled'
]
id[
label = 'Shapefile from: {url}\\lDownloaded: {date}\\lN = {n} Segments\\l'
pos = '0,0!'
width = 14
]
id_top[
style = invis
pos = '{x},0!'
]
id_bot[
style = invis
pos = '{x},-2.25!'
]
id_top -> id_bot
",
url = criteria$data_url,
date = criteria$data_date,
n = criteria$data_n,
x = x_edge
)
# Step 2 vars
fi <- 0
y <- -0
s2 <- ""
# Step 2 filtering infra
if (criteria$infra_filter_applied) {
fi <- fi + 1
y <- y - 2.25
s2 <- glue("
{s2}
filter{fi}[
label = 'Filter for Dedicated Cycling Infrastructure\\l{column} in {filter}\\l(n = {n})\\l'
pos = '-2,{y}!'
]
filter{fi}x[
label = 'Segments Excluded\\l(n = {nx})\\l'
pos = '5.5,{y}!'
width = 3
]
filter{fi} -> filter{fi}x
filter{fi}_top[
style = invis
pos = '{x},{y}!'
]
filter{fi}_bot[
style = invis
pos = '{x},{y - 2.25}!'
]
filter{fi}_top -> filter{fi}_bot
",
column = criteria$infra_col,
filter = str_replace_all(
str_wrap(
paste0(
criteria$infra_filter,
collapse = ", "
),
width = 83
),
"[\r\n]",
"\\\\l"
),
n = criteria$infra_filter_n,
nx = criteria$infra_filter_nx,
fi = fi,
y = y,
x = x_edge,
s2 = s2
)
}
# Step 2 filtering road
if (criteria$road_filter_applied) {
fi <- fi + 1
y <- y - 2.25
s2 <- glue("
{s2}
filter{fi}[
label = 'Filter for Infrastructure Located on Roadway\\l{column} != {filter}\\l(n = {n})\\l'
pos = '-2,{y}!'
]
filter{fi}x[
label = 'Segments Excluded\\l(n = {nx})\\l'
pos = '5.5,{y}!'
width = 3
]
filter{fi} -> filter{fi}x
filter{fi}_top[
style = invis
pos = '{x},{y}!'
]
filter{fi}_bot[
style = invis
pos = '{x},{y - 2.25}!'
]
filter{fi}_top -> filter{fi}_bot
",
column = criteria$road_col,
filter = str_replace_all(
str_wrap(
paste0(
criteria$road_filter,
collapse = ", "
),
width = 83
),
"[\r\n]",
"\\\\l"
),
n = criteria$road_filter_n,
nx = criteria$road_filter_nx,
fi = fi,
y = y,
x = x_edge,
s2 = s2
)
}
# Step 2 filtering status
if (criteria$status_filter_applied) {
fi <- fi + 1
y <- y - 2.25
s2 <- glue("
{s2}
filter{fi}[
label = 'Filter for Active Infrastructure Status\\l{column} != {filter}\\l(n = {n})\\l'
pos = '-2,{y}!'
]
filter{fi}x[
label = 'Segments Excluded\\l(n = {nx})\\l'
pos = '5.5,{y}!'
width = 3
]
filter{fi} -> filter{fi}x
filter{fi}_top[
style = invis
pos = '{x},{y}!'
]
filter{fi}_bot[
style = invis
pos = '{x},{y - 2.25}!'
]
filter{fi}_top -> filter{fi}_bot
",
column = criteria$status_col,
filter = str_replace_all(
str_wrap(
paste0(
criteria$status_filter,
collapse = ", "
),
width = 83
),
"[\r\n]",
"\\\\l"
),
n = criteria$status_filter_n,
nx = criteria$status_filter_nx,
fi = fi,
y = y,
x = x_edge,
s2 = s2
)
}
# Step 2 filtering null geom
if (criteria$geom_filter_null_applied) {
fi <- fi + 1
y <- y - 2.25
s2 <- glue("
{s2}
filter{fi}[
label = 'Filter for Null Geometry\\l{column} is not null\\l(n = {n})\\l'
pos = '-2,{y}!'
]
filter{fi}x[
label = 'Segments Excluded\\l(n = {nx})\\l'
pos = '5.5,{y}!'
width = 3
]
filter{fi} -> filter{fi}x
filter{fi}_top[
style = invis
pos = '{x},{y}!'
]
filter{fi}_bot[
style = invis
pos = '{x},{y - 2.25}!'
]
filter{fi}_top -> filter{fi}_bot
",
column = criteria$geom_col,
n = criteria$geom_filter_null_n,
nx = criteria$geom_filter_null_nx,
fi = fi,
y = y,
x = x_edge,
s2 = s2
)
}
# Step 2 filtering dup geom
if (criteria$geom_filter_dup_applied) {
fi <- fi + 1
y <- y - 2.25
s2 <- glue("
{s2}
filter{fi}[
label = 'Filter for Duplicate Geometry\\l{column} is not duplicated\\l(n = {n})\\l'
pos = '-2,{y}!'
]
filter{fi}x[
label = 'Segments Excluded\\l(n = {nx})\\l'
pos = '5.5,{y}!'
width = 3
]
filter{fi} -> filter{fi}x
filter{fi}_top[
style = invis
pos = '{x},{y}!'
]
filter{fi}_bot[
style = invis
pos = '{x},{y - 2.25}!'
]
filter{fi}_top -> filter{fi}_bot
",
column = criteria$geom_col,
n = criteria$geom_filter_dup_n,
nx = criteria$geom_filter_dup_nx,
fi = fi,
y = y,
x = x_edge,
s2 = s2
)
}
# Step 2 filtering
s2 <- glue("
filter_title[
label = <<b>Filtering</b>>
pos = '-8.5,{y}!'
width = 2
height = {h}
fillcolor = '#d7e9fe'
style = 'rounded,filled'
]
{s2}
",
h = (fi * 2.1),
fi = fi,
y = y + if (fi == 1) 0 else (((fi -1) / 2) * 2.25),
s2 = s2
)
# Step 3 eligible
y <- y - 2.25
s3 <- glue("
elig_title[
label = <<b>Eligible</b>>
pos = '-8.5,{y}!'
width = 2
height = 1.9
fillcolor = '#d7e9fe'
style = 'rounded,filled'
]
elig[
label = 'Segments Included for Data Entry and Screening\\l(n = {n})\\l'
pos = '0,{y}!'
width = 14
]
elig_top[
style = invis
pos = '{x},{y}!'
]
elig_bot[
style = invis
pos = '{x},{y - 2.25}!'
]
elig_top -> elig_bot
",
n = criteria$elig_n + criteria$noverify_filter_nx,
y = y,
x = x_edge
)
# Step 4 Screening
s4 <- ""
# Step 4 title
y <- y - 2.65
s4 <- glue("
screen_title[
label = <<b>Screening</b>>
pos = '-8.5,{y}!'
width = 2
height = 2.55
fillcolor = '#d7e9fe'
style = 'rounded,filled'
]
",
n = criteria$misclass_n,
y = y
)
# Step 4 noverify
misclass_noverify <- ""
if (criteria$noverify_filter_nx > 0) {
misclass_noverify <- glue(
"{n} screened, {nx} not screened\\l",
n = criteria$misclass_filter_n,
nx = criteria$noverify_filter_nx
)
}
# Step 4 misclass
s4 <- glue("
{s4}
screen[
label = 'Exclusion of Misclassifications and\\lDuplicates following Screening\\l{column} != {filter}\\l{noverify}(n = {n})\\l'
pos = '-4.5,{y}!'
width = 5
height = 2.5
]
screenx[
label = '{misclass}'
pos = '3,{y}!'
width = 7.95
height = 2.5
]
screen -> screenx
screen_top[
style = invis
pos = '{x},{y - 0.35}!'
]
screen_bot[
style = invis
pos = '{x},{y - 2.75}!'
]
screen_top -> screen_bot
",
column = criteria$misclass_col,
filter = str_replace_all(
str_wrap(
paste0(
criteria$misclass_filter,
collapse = ", "
),
width = 83
),
"[\r\n]",
"\\\\l"
),
n = criteria$misclass_filter_n + criteria$noverify_filter_nx,
noverify = misclass_noverify,
misclass = paste0(
"Misclassifications: ",
criteria$misclass_filter_uniq_n[[1]],
" (n = ",
criteria$misclass_filter_uniq_n[[2]],
")\\l",
collapse = ""
),
y = y,
x = x_edge,
s4 = s4
)
# Step 5 noverify
incl_noverify <- ""
if (criteria$noverify_filter_nx > 0) {
incl_noverify <- glue(
"{n} verified, {nx} not verified\\l",
n = criteria$incl_n,
nx = criteria$noverify_filter_nx
)
}
# Step 5 Inclusions
y <- y - 2.75
s5 <- glue("
incl_title[
label = <<b>Inclusions</b>>
pos = '-8.5,{y}!'
width = 2
height = 1.9
fillcolor = '#c8e29d'
style = 'rounded,filled'
]
incl[
label = '{verified}Inclusions\\l{noverify}(n = {n})\\l'
pos = '0,{y}!'
width = 14
]
",
verified = if (criteria$noverify_filter_nx > 0) "Verified and Non-verified " else "Verified ",
noverify = incl_noverify,
n = criteria$incl_n + criteria$noverify_filter_nx,
y = y
)
# Combine steps
out[[criteria$city]] <- paste0(
"digraph {\n",
diag_settings,
"\n",
s1,
"\n",
s2,
"\n",
s3,
"\n",
s4,
"\n",
s5,
"\n",
"}"
)
}
# Return diagrams or single diagram if city is given
out <- if (length(out) > 1) out else out[[1]]
out <- if (out_render) grViz(out) else out
return(out)
}Prepare Infrastructure Changes Data for Mapping.
#' Prepare Infrastructure Changes Data for Mapping
#'
#' This function prepares city data in a list format for mapping infrastructure changes since a target year.
#'
#' @param map_list A list of lists, where each list contains the following structure defining the city mapping data and settings:
#' \itemize{
#' \item \code{title}: the title (char) of the main city map.
#' \item \code{data}: the sf data.frame containing road segments of the install, upgrade1, and upgrade2 years and types (required).
#' \item \code{downtown_bbox}: a vector (numeric) containing the coordinates of the downtown region's bounding box in xmin, ymin, xmax, and ymax respectively.
#' }
#' @param year_since The year (numeric) since to examine infrastructure changes.
#'
#' @return A list of lists, where each list has keys and values from \code{map_list}, and the following additional keys:
#' \itemize{
#' \item \code{data_map}: a sf data.frame with an additional `changes` column indicating the infrastructure changes since the target `year_since`.
#' \item \code{data_bbox}: a sf data.frame of the bounding box of `data_map`.
#' \item \code{data_downtown}: Same as `data_map` except for the downtown region indicated by `downtown_bbox`.
#' \item \code{data_downtown_bbox}: a sf data.frame of the bounding box of `data_downtown`.
#' \item \code{map_colors}: the colors (char) for each of the infrastructure change categories.
#' \item \code{map_column}: the column name (char) to be mapped
#' \item \code{downtown_title}: the name (char) of the downtown subset map
#' }
#' @export
#'
prep_infra <- function(
map_list,
year_since = settings$infra_changes_year
) {
# Create color palette
colors <- c("green", "orange", "gray")
names(colors) <- c(
glue("New Infrastructure Since Jan. {year}", year = year_since), # green
glue("Upgraded Infrastructure Since Jan. {year}", year = year_since), # orange
"Unchanged Infrastructure" # gray
)
# Generate maps per city
out <- map_list
for (i in 1:length(map_list)) {
# Get city vars
city <- map_list[[i]]
# Create downtown title if not given
if (!"downtown_title" %in% names(city)) {
id <- names(map_list)[[i]]
downtown_title <- glue(
"Downtown {id}",
id = str_to_title(id)
)
} else {
downtown_title <- city$downtown_title
}
# Create col to identify infra changes
map_data <- city$data %>%
mutate(
changes = case_when(
(
!is.na(verify_upgrade1_type) &
!is.na(verify_upgrade1_year) &
verify_upgrade1_year >= year_since
) | (
!is.na(verify_upgrade2_type) &
!is.na(verify_upgrade2_year) &
verify_upgrade2_year >= year_since
) ~ glue(
"Upgraded Infrastructure Since Jan. {year}",
year = year_since
),
!is.na(verify_install_type) &
!is.na(verify_install_year) &
verify_install_year >= year_since ~
glue(
"New Infrastructure Since Jan. {year}",
year = year_since
),
.default = "Unchanged Infrastructure"
)
)
# Create bounding box for city
city_bbox <- st_as_sfc(
st_bbox(city$data, crs = 4326)
)
# Create bounding box for downtown region
downtown_bbox <- st_as_sfc(
st_bbox(city$downtown_bbox, crs = 4326)
)
# Subset data for downtown region
submap_data <- map_data %>% st_crop(downtown_bbox)
# Add prep data to cities list
out[[i]]$data_map <- map_data
out[[i]]$data_bbox <- city_bbox
out[[i]]$data_downtown <- submap_data
out[[i]]$data_downtown_bbox <- downtown_bbox
out[[i]]$map_colors <- colors
out[[i]]$map_column <- "changes"
out[[i]]$downtown_title <- downtown_title
}
return(out)
}Maps Infrastructure Changes.
Creates maps of infrastructure changes since a certain year for each
city and their downtown region using output from
prep_map.
#' Map Infrastructure Changes
#'
#' This function maps infrastructure changes since a target year.
#'
#' @inheritParams prep_infra
#'
#' @return A `patchwork` object of `ggplot` objects combined together to form the multiple maps in arranged on a layout.
#' @export
#'
map_infra <- function(
map_list,
year_since = settings$infra_changes_year
) {
# Prepare data for maps
cities_prep <- prep_infra(map_list)
# Generate maps per city
out <- list()
for (i in 1:length(cities_prep)) {
# Get city vars
city <- cities_prep[[i]]
id <- names(cities_prep)[[i]]
# Create base map for city and downtown map
base_map <- ggplot() +
annotation_map_tile(
zoomin = 1,
type = "cartolight",
cachedir = "../data/cache"
) +
annotation_north_arrow(
width = unit(0.2, "cm"),
height = unit(0.5, "cm"),
location = "br"
) +
annotation_scale(
location = "bl",
style = "ticks"
) +
scale_color_manual(values = city$map_colors) +
fixed_plot_aspect(ratio = 1.5) +
theme_void()
# Generate city map
out[[id]] <- base_map +
ggtitle(city$title) +
layer_spatial(city$data_map, aes(color = .data[[city$map_column]])) +
layer_spatial(city$data_downtown_bbox, color = "red", fill = NA, linewidth = 0.5) +
guides(colour = guide_legend(
override.aes = list(linewidth = 3)
))
# Generate downtown map
out[[paste0(id, "_downtown")]] <- base_map +
ggtitle(city$downtown_title) +
layer_spatial(city$data_downtown, aes(color = .data[[city$map_column]])) +
guides(color = "none")
}
# Combine maps into single layout
out <- wrap_plots(out, ncol = 2) +
plot_layout(guides = "collect") &
theme(
legend.position = "bottom",
legend.title = element_blank(),
plot.title = element_text(
size = 8,
margin = margin(t = 8, b = -20, l = 8)
),
plot.margin = margin(t = 8, l = 0, r = 0),
panel.border = element_rect(
colour = "gray20",
fill = NA,
linewidth = 0.5
)
)
return(out)
}Maps Infrastructure Changes in Detail.
Creates enlarged maps of infrastructure changes since a certain year for each city and their downtown region.
#' Map Infrastructure Changes in Detail
#'
#' This function creates enlarged maps of infrastructure changes since a target year.
#'
#' @inheritParams prep_infra
#' @param city_key They city key (char) to map from `map_list`. If `NULL`, maps all cities and returns a list, otherwise if given, returns an item from the list (required).
#' @param map_inset Set to `TRUE` to create an inset map of the downtown region or `FALSE` to omit the inset map.
#' @param map_inset_position A named vector (numeric) containing four values indicating the position of the inset map with the names being `left`, `bottom`, `right`, and `top` aligned to the `full` area. See \link[patchwork]{inset_element}.
#'
#' @param map_ratio The aspect ratio (numeric) of the map.
#' @param map_inset_ratio The aspect ratio (numeric) of the subset map.
#' @return A list of `patchwork` object of `ggplot` objects combined together to form the enlarged maps, where the keys are the names of the cities as in `map_list`. If `city_key` is provided, returns only one of the items from this list.
#' @export
#'
map_infra_detail <- function(
map_list,
city_key = NULL,
map_inset = TRUE,
map_inset_position = c(
left = 0.6,
bottom = 0.6,
right = 1,
top = 1
),
map_ratio = 1.75,
map_inset_ratio = 2,
year_since = settings$infra_changes_year,
...
) {
# Only map one city if given
if (!is.null(city_key)) {
map_list <- list(map_list[[city_key]])
names(map_list) <- city_key
}
# Prepare data for maps
cities_prep <- prep_infra(map_list)
# Generate enlarged maps per city
out <- list()
for (i in 1:length(cities_prep)) {
# Get city vars
city <- cities_prep[[i]]
id <- names(cities_prep)[[i]]
# Create base map for city and downtown map
base_map <- ggplot() +
annotation_map_tile(
zoomin = 1,
type = "cartolight",
cachedir = "../data/cache"
) +
scale_color_manual(values = city$map_colors) +
theme_void()
# Generate city map
if ("map_ratio" %in% city) {
map_ratio <- city$map_ratio
}
city_map <- base_map +
fixed_plot_aspect(ratio = map_ratio) +
annotation_north_arrow(
width = unit(0.2, "cm"),
height = unit(0.5, "cm"),
location = "br"
) +
annotation_scale(
location = "bl",
style = "ticks"
) +
layer_spatial(city$data_map, aes(color = .data[[city$map_column]])) +
guides(colour = guide_legend(
override.aes = list(linewidth = 3)
)) +
theme(
legend.position = "bottom",
legend.title = element_blank(),
panel.border = element_rect(
colour = "gray20",
fill = NA,
linewidth = 0.5
)
)
# Add inset map as downtown region
map_inset <- if ("map_inset" %in% names(city)) city$map_inset else map_inset
if (map_inset) {
# Generate downtown map
if ("map_inset_ratio" %in% city) {
map_inset_ratio <- city$map_inset_ratio
}
downtown_map <- base_map +
fixed_plot_aspect(ratio = map_inset_ratio) +
layer_spatial(city$data_downtown, aes(color = .data[[city$map_column]])) +
guides(color = "none") +
annotation_scale(
location = "tl",
style = "ticks"
) +
theme(
panel.border = element_rect(
colour = "black",
fill = NA,
linewidth = 0.75
)
)
# Create final map with inset
if ("map_inset_position" %in% names(city)) {
map_inset_position <- city$map_inset_position
}
out[[id]] <- city_map + inset_element(
downtown_map,
left = map_inset_position[["left"]],
bottom = map_inset_position[["bottom"]],
right = map_inset_position[["right"]],
top = map_inset_position[["top"]],
align_to = "full"
)
} else {
# No inset for final map
out[[id]] <- city_map
}
}
# Return list of all city maps or single map if city_key given
if (!is.null(city_key)) {
out <- out[[city_key]]
}
return(out)
}Load raw data provided by Konrad Samsel.
Note: Only segments with verified installations are shown (n = 745 of 3666).